Dependencies

library(dplyr)
library(readr)
library(tibble)
library(forcats)
library(proxy)
library(janitor)
library(metafor)
library(ggplot2)
library(ggstance)
library(scales)
library(knitr)
library(kableExtra)

Data

data_depression <- read_csv("../data/depression - Fried 2017/MatrixB.csv")     #Load dat for estimating Jaccard index (no difference between specific and compound symptoms)

data_anxiety <- read_csv("../data/anxiety - Wall & Lee 2022/anxiety_wall_lee_jaccard.csv")

data_psychosis <- read_csv("../data/psychosis - Bernardin et al. 2023/psychosis - Bernardin et al. 2023.csv") |> 
  janitor::clean_names() %>%
  mutate(across(everything(), ~ ifelse(. == 2, 1, .)))

data_hypomania <- read_csv("../data/hypomania - Chrobak et al. 2018/hypomania.csv") 

data_eatingdisorders <- read_csv("../data/eating disorders - Christensen Pacella et al. 2024/MatrixB_EDMeasures_5-26-2023.csv")

data_youth_ocd <- read_csv("../data/ocd - Visiontay et al. 2019/youth_ocd.csv") |> 
  janitor::clean_names() %>%
  mutate(across(everything(), ~ ifelse(. == 2, 1, .)))

# youth depression

# youth anxiety

Jaccard index interpretation

Real, R., & Vargas, J. M. (1996). The probabilistic basis of Jaccard’s index of similarity. Systematic Biology, 45(3), 380-385. DOI:10.1093/sysbio/45.3.380

  • Jaccard < 0.25: Typically considered low similarity, meaning that the two sets have few elements in common.
  • 0.25 ≤ Jaccard < 0.5: Moderate similarity, which indicates that the sets share some commonalities but still have many differences.
  • 0.5 ≤ Jaccard < 0.75: High similarity, meaning there is substantial overlap between the sets.
  • Jaccard ≥ 0.75: Very high similarity, with most elements being shared between the sets.

3-level meta-analyses by domain

escalc_jaccard <- function(.data, domain = NA){
  
  data_mat_t <- .data |> 
      as.matrix() |> 
      t()
  
  jaccard_diff <- proxy::dist(data_mat_t, method = "Jaccard") |> as.matrix()
  jaccard_sim <- 1 - jaccard_diff
  
  # calculate SEs for each Jaccard similarity
  var_names <- rownames(data_mat_t)
  n_vars <- length(var_names)
  
  # initialize the SE matrix
  se_matrix <- matrix(NA, nrow = n_vars, ncol = n_vars)
  rownames(se_matrix) <- var_names
  colnames(se_matrix) <- var_names
  
  # compute SEs
  for (i in 1:n_vars) {
    for (j in 1:n_vars) {
      x <- data_mat_t[i, ]
      y <- data_mat_t[j, ]
      a <- sum(x == 1 & y == 1)
      b <- sum(x == 1 & y == 0)
      c <- sum(x == 0 & y == 1)
      n <- a + b + c
      J <- a / n
      SE <- sqrt(J * (1 - J) / n)
      se_matrix[i, j] <- SE
    }
  }
  
  # logit transform
  logit_jaccard_sim <- qlogis(jaccard_sim)
  
  # calculate SEs on the logit scale using the delta method
  # SE_logit_p = SE_p / [p * (1 - p)]
  se_logit_jaccard_sim <- se_matrix / (jaccard_sim * (1 - jaccard_sim))
  
  # replace infinite values resulting from division by zero
  se_logit_jaccard_sim[!is.finite(se_logit_jaccard_sim)] <- NA
  
  # create variable pair names
  m <- jaccard_sim
  ## get indices and values
  indices <- which(lower.tri(m), arr.ind = TRUE)
  vec <- m[lower.tri(m)]
  ## create variable pair names
  variable_pairs <- paste(rownames(m)[indices[,1]], colnames(m)[indices[,2]], sep = " - ")
  
  # create a data frame for effect sizes
  dat_es <- tibble(domain = domain,
                   yi = logit_jaccard_sim[lower.tri(logit_jaccard_sim)],
                   sei = se_logit_jaccard_sim[lower.tri(se_logit_jaccard_sim)],
                   scale1 = rownames(m)[indices[,1]],
                   scale2 = colnames(m)[indices[,2]],
                   pair = variable_pairs)
  
  return(dat_es)
}


rma_jaccard_three_level <- function(effect_sizes, title = "Scales"){
  
  # fit a three-level meta-analysis model accounting for shared scales
  fit <- rma.mv(yi = yi, 
                V = sei^2,
                random = list(~ 1 | scale1, ~ 1 | scale2),
                data = effect_sizes,
                method = "REML")
  
  # create a forest plot
  plot_forest <- 
    forest(fit, 
           transf = plogis,
           slab = effect_sizes$pair,
           xlim = c(-0.8, 1.7),
           at = c(0, .2, .4, .6, .8, 1),
           efac = 0.2,
           addpred = TRUE,
           xlab = "Jaccard Similarity",
           header = c(title, "J [95% CI]"),
           refline = NA)
  
  # table of results
  results_predictions <- fit |>
    predict() |>
    as_tibble() |>
    dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower = pi.lb, pi_upper = pi.ub) |>
    mutate_all(plogis) |>
    mutate_all(janitor::round_half_up, digits = 2) |>
    mutate(domain = title) |>
    relocate(domain, .before = J)
  
  return(list(fit = fit,
              predictions = results_predictions,
              forest = plot_forest))
}

Depression

es_depression <- data_depression |>
  escalc_jaccard(domain = "Depression") 

res_depression <- es_depression |>
  rma_jaccard_three_level(title = "Depression scales")

res_depression$predictions |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
Depression scales 0.43 0.35 0.52 0.24 0.64

Anxiety

es_anxiety <- data_anxiety |>
  escalc_jaccard(domain = "Anxiety") 

res_anxiety <- es_anxiety |>
  rma_jaccard_three_level(title = "Anxiety scales")

res_anxiety$predictions |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
Anxiety scales 0.27 0.22 0.32 0.14 0.44

Psychosis

es_psychosis <- data_psychosis |>
  escalc_jaccard(domain = "Psychosis")

res_psychosis <- es_psychosis |>
  rma_jaccard_three_level(title = "Psychosis scales")

res_psychosis$predictions |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
Psychosis scales 0.21 0.16 0.26 0.08 0.43

Hypomania

es_hypomania <- data_hypomania |>
  escalc_jaccard(domain = "(Hypo)mania")

res_hypomania <- es_hypomania |>
  rma_jaccard_three_level(title = "(Hypo)mania scales")

res_hypomania$predictions |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
(Hypo)mania scales 0.4 0.31 0.5 0.2 0.64

OCD (youth)

es_youth_ocd <- data_youth_ocd |>
  escalc_jaccard(domain = "Youth OCD")

res_youth_ocd <- es_youth_ocd |>
  rma_jaccard_three_level(title = "Youth OCD scales")

res_youth_ocd$predictions |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
Youth OCD scales 0.38 0.32 0.44 0.32 0.44

Eating disorders

es_eatingdisorders <- data_eatingdisorders |>
  escalc_jaccard(domain = "OCD")

res_eatingdisorders <- es_eatingdisorders |>
  rma_jaccard_three_level(title = "Eating disorders scales")

res_eatingdisorders$predictions |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
Eating disorders scales 0.2 0.14 0.28 0.09 0.39

4-level meta-analyses across domains

es_combined <- bind_rows(
  es_depression,
  es_anxiety,
  es_psychosis,
  es_hypomania,
  es_eatingdisorders,
  es_youth_ocd
)

rma_jaccard_four_level <- function(effect_sizes){
  
  # Fit a four-level meta-analysis model accounting for shared scales
  fit <- rma.mv(yi = yi,
                V = sei^2,
                random = list(~ 1 | domain/scale1, ~ 1 | domain/scale2),
                data = effect_sizes,
                method = "REML")
  
  # Create a forest plot
  plot_forest <- 
    forest(fit, 
           transf = plogis,
           slab = effect_sizes$pair,
           xlim = c(-0.8, 1.7),
           at = c(0, .2, .4, .6, .8, 1),
           #cex = 0.4,
           efac = 0.2,
           addpred = TRUE,
           xlab = "Jaccard Similarity",
           header = c("Scales", "J [95% CI]"),
           refline = NA)
  
  results_predictions <- fit |>
    predict() |>
    as_tibble() |>
    dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower = pi.lb, pi_upper = pi.ub) |>
    mutate_all(plogis) |>
    mutate_all(janitor::round_half_up, digits = 2) |>
    mutate(domain = "4-level RE") |>
    relocate(domain, .before = J)
  
  return(list(fit = fit,
              predictions = results_predictions,
              forest = plot_forest))
}

res_combined <- rma_jaccard_four_level(es_combined)

res_combined$predictions |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
4-level RE 0.3 0.23 0.39 0.11 0.6
#res_combined$fit
 
# Combine estimates and labels into a data frame for easy viewing
tibble(label = c("domain", "domain/scale1", "domain again", "domain/scale2")) |>
  mutate(logit_sigma = res_combined$fit$sigma2,
         sigma = round_half_up(plogis(logit_sigma), 2)) |>
  filter(label != "domain again") |>
  select(-logit_sigma) |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
label sigma
domain 0.52
domain/scale1 0.53
domain/scale2 0.51

Comparions between domains

3- and 4-level meta

predictions_combined_by_domain <- bind_rows(
  res_depression$predictions,
  res_anxiety$predictions,
  res_psychosis$predictions,
  res_hypomania$predictions,
  res_youth_ocd$predictions,
  res_eatingdisorders$predictions,
  res_combined$predictions
) |>
  mutate(domain = fct_rev(fct_relevel(domain,
                                      "Depression scales",
                                      "Anxiety scales",
                                      "Psychosis scales",
                                      "(Hypo)mania scales",
                                      "Youth OCD scales",
                                      "Eating disorders scales",
                                      "4-level RE")))

predictions_combined_by_domain |>
  kable(align = "r") |>
  kable_classic(full_width = FALSE)
domain J ci_lower ci_upper pi_lower pi_upper
Depression scales 0.43 0.35 0.52 0.24 0.64
Anxiety scales 0.27 0.22 0.32 0.14 0.44
Psychosis scales 0.21 0.16 0.26 0.08 0.43
(Hypo)mania scales 0.40 0.31 0.50 0.20 0.64
Youth OCD scales 0.38 0.32 0.44 0.32 0.44
Eating disorders scales 0.20 0.14 0.28 0.09 0.39
4-level RE 0.30 0.23 0.39 0.11 0.60
ggplot(predictions_combined_by_domain, aes(J, domain)) +
  # geom_errorbarh(aes(xmin = pi_lower, xmax = pi_upper), linetype = "dotted", height = 0.26) +
  # geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", height = 0.26, size = 0.85) +
  geom_linerange(aes(xmin = pi_lower, xmax = pi_upper), linetype = "solid", size = 0.45) +
  geom_linerange(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", size = 0.90) +
  geom_point(size = 2.5) +
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1), breaks = scales::breaks_pretty(n = 5), name = "Jaccard Similarity") +
  ylab("")

dir.create("plots")

ggsave("plots/plot.pdf",
       width = 6,
       height = 4)

Session info

sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0    knitr_1.48          scales_1.3.0       
##  [4] ggstance_0.3.7      ggplot2_3.5.1       metafor_4.6-0      
##  [7] numDeriv_2016.8-1.1 metadat_1.2-0       Matrix_1.6-5       
## [10] janitor_2.2.0       proxy_0.4-27        forcats_1.0.0      
## [13] tibble_3.2.1        readr_2.1.5         dplyr_1.1.4        
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.5      xfun_0.46         bslib_0.7.0       lattice_0.22-6   
##  [5] mathjaxr_1.6-0    tzdb_0.4.0        vctrs_0.6.5       tools_4.3.3      
##  [9] generics_0.1.3    parallel_4.3.3    fansi_1.0.6       highr_0.11       
## [13] pkgconfig_2.0.3   lifecycle_1.0.4   compiler_4.3.3    farver_2.1.2     
## [17] stringr_1.5.1     textshaping_0.3.7 munsell_0.5.1     snakecase_0.11.1 
## [21] htmltools_0.5.8.1 sass_0.4.9        yaml_2.3.8        pillar_1.9.0     
## [25] crayon_1.5.2      jquerylib_0.1.4   cachem_1.0.8      nlme_3.1-164     
## [29] tidyselect_1.2.1  digest_0.6.35     stringi_1.8.4     fastmap_1.1.1    
## [33] grid_4.3.3        colorspace_2.1-1  cli_3.6.3         magrittr_2.0.3   
## [37] utf8_1.2.4        withr_3.0.1       bit64_4.0.5       lubridate_1.9.3  
## [41] timechange_0.3.0  rmarkdown_2.26    bit_4.0.5         ragg_1.3.0       
## [45] hms_1.1.3         evaluate_0.23     viridisLite_0.4.2 rlang_1.1.4      
## [49] glue_1.7.0        xml2_1.3.6        svglite_2.1.3     rstudioapi_0.16.0
## [53] vroom_1.6.5       jsonlite_1.8.8    R6_2.5.1          systemfonts_1.0.6